****************************************************************
******** Dutch Disease: Firm-level evidence from Indonesia *****
****************************************************************





********************************
*graph analysis 
********************************

*collapse Woodmac oil data by 1998 kab
use oil_district_merged_full_version2_old.dta, clear
rename  kode_kab code
merge m:1 code using code_1998.dta
collapse (sum)  OIL_PRODUCTION GAS_PRODUCTION TOTAL_PRODUCTION CAPEX_REAL OPEX_REAL, by( year code98)
drop if year==.
save oil_code98.dta, replace

use firms_indonesia.dta, clear
merge m:1 code98 year using oil_code98.dta
rename  _merge _merge_oil

merge m:1 code98 year using gdps_98codes.dta
rename  _merge _merge_gdp

merge m:1 code98 year using code98_wells.dta
rename  _merge _merge_wells
replace wells=0 if wells==.
replace non_dry=0 if non_dry==.

*merge discovery rates
merge m:1 code98 year using discovery_rates.dta
rename _merge _merge_disc


*---------------------------------------
*LUCK is not correlated with exploration
preserve
drop if year<1990
drop if year>2008
replace explor_wells=0 if explor_wells==.
histogram oil_gas_disc_rate, bin(10) start(-0.0001) fraction xtitle(Exploration wells' success rate across district-years) xsize(4) ysize(4)
***graph export "tex\hist_luck_test.eps", as(eps) preview(off) replace
histogram explor_wells  , bin(10) start(0) fraction xtitle(Exploration wells across district-years) xsize(4) ysize(4)
***graph export "tex\hist_wells_test.eps", as(eps) preview(off) replace
histogram explor_wells if explor_wells>0  , bin(10) start(0) fraction xtitle(Exploration wells across district-years) xsize(4) ysize(4)
***graph export "tex\hist_wells2_test.eps", as(eps) preview(off) replace
twoway (scatter oil_gas_disc_rate explor_wells) (lfit oil_gas_disc_rate explor_wells), xtitle(Exploration wells) ytitle(Success rate) xsize(4) ysize(4) legend(off)
***graph export "tex\scatter_luck_test.eps", as(eps) preview(off) replace
restore


*oil and gas production timeline
preserve
collapse (sum)  OIL_PRODUCTION GAS_PRODUCTION TOTAL_PRODUCTION, by(year)
tsset year
drop if year==.
keep if year>1989
drop if year>2008
*twoway (line OIL year) (line  GAS year, lpattern(dash) yaxis(2))
gen OIL=OIL_PRODUCTION/1000
gen GAS=(TOTAL_PRODUCTION-OIL_PRODUCTION)/1000
twoway (line OIL year) (line  GAS year, lpattern(dash) ytitle(Barrels per day (thousands)) xtitle("")) 
***graph export "tex\oil_gas_timeline_test.eps", as(eps) preview(off) replace
restore

*wells exploration and discovery per year
preserve
collapse (sum)  OIL_PRODUCTION GAS_PRODUCTION TOTAL_PRODUCTION wells non_dry (mean) success_rate gas_disc_rate oil_disc_rate explor_wells  oil_gas_disc_rate, by(year)
drop if year==.
keep if year>1989
drop if year>2008
gen OIL=OIL_PRODUCTION/1000
gen GAS=(TOTAL_PRODUCTION-OIL_PRODUCTION)/1000
gen TOTAL=TOTAL_PRODUCTION/1000
label var TOTAL "OIL and GAS (Thousand bdp)" 
label var oil_gas_disc_rate "Success rate"
tsset year
twoway (line TOTAL year if year<2007 & year>1994 , yaxis(2) xtick(#11) xlabel(#11)) (line l6.oil_gas_disc_rate year if year<2007 & year>1994, lpattern(dash)  ytitle(Success rate) xtitle("") )
***graph export "tex\success_timeline_test.eps", as(eps) preview(off) replace
restore

preserve
drop if year==.
keep if year>1989
drop if year>2006
collapse oil_gas_disc_rate explor_wells, by(year)
lab var oil_gas_disc_rate "Success rate"
lab var explor_wells "Avg nb of wells"
twoway (bar explor_wells year, xtitle("") ytitle(Avg nb of wells)) (line oil_gas_disc_rate year, yaxis(2)  ytitle(Success rate, axis(2))) if year>1990 & year<2009
***graph export "tex\success_timeline2_test.eps", as(eps) preview(off) replace
restore


*share of kab with oil or gas and luck
preserve
replace explor_wells=0 if explor_wells==.
drop if year==.
keep if year>1989
drop if year>2008
collapse (sum)  OIL_PRODUCTION GAS_PRODUCTION TOTAL_PRODUCTION  explor_wells  oil_gas_disc_rate, by( code98 year)
gen kabs=1
gen prod=0
replace prod=1 if TOTAL_PRODUCTION>0
gen explor=0
replace explor=1 if explor_wells>0
gen luck=0
replace luck=1 if  oil_gas_disc_rate >.2
collapse (sum)  prod explor luck kabs, by( year)
label var luck "With a >20% success rate" 
label var prod "With production"
label var kabs "Nb of districts" 
label var explor "With exploration"
foreach x in luck prod explor{
replace `x'=`x' / kabs
}
twoway (line prod year) (line explor year, lpattern(dash)) (line  luck year, lpattern(dot) ytitle(Share of Kabupatens) xtitle("")) 
***graph export "tex\kabs_timeline_test.eps", as(eps) preview(off) replace
restore


*maps of avg oil and gas production, and nb of firms, wages
preserve
drop if year==.
keep if year>1989
drop if year>2008
gen firm=1 if id!=.
bys code year: egen firms=sum(firm)
gen wage_rate= wages_total/ empl_total
collapse (sum)  OIL_PRODUCTION GAS_PRODUCTION TOTAL_PRODUCTION (mean) firms wage_rate, by( code )
rename  code KODE_KAB
save "tex\map_merge.dta", replace
***map file
*shp2dta using indonesia_kab,  database(indo_map) coordinates(indo_coord)
use indo_map, clear
merge 1:1 KODE_KAB using map_merge
spmap    TOTAL_PRODUCTION using indo_coord , id(_ID) fcolor(Blues2) ndfcolor(gs12) cln(10)  title(Oil and gas total production 1990-2008)
***graph export "tex\map_oil_gas_test.eps", as(eps) preview(off) replace
replace firms=int(firms)
replace wage_rate=int(wage_rate)
spmap    firms using indo_coord , id(_ID) fcolor(Blues2) ndfcolor(gs12) cln(5)  title(Average number of firms 1990-2008)
***graph export "tex\map_firms_test.eps", as(eps) preview(off) replace
spmap    wage_rate using indo_coord , id(_ID) fcolor(Blues2) ndfcolor(gs12) cln(5)  title(Average wage rate 1990-2008)
***graph export "tex\map_wages_test.eps", as(eps) preview(off) replace
restore


*nb firms per year
preserve
gen firms=.
replace firms=1 if id!=.
collapse (sum)  firms, by(year)
drop if year==.
keep if year>1989
drop if year>2008
tsline firms, ytitle(Number of firms) xtitle("")
***graph export "tex\firms_timeline_test.eps", as(eps) preview(off) replace
restore


*nb of firms by type and type of kab
preserve
drop if year<1990
drop if year>2006
gen entrant=0
replace entrant=1 if  justentered==1
gen exiter=0
replace exiter=1 if willexit==1
gen exit=0
bys id: egen maxyear=max(year)
replace exit=1 if year== maxyear
replace exiter=1 if exit==1
gen total=1
gen oil_gas=0
replace oil_gas=1 if TOTAL_PRODUCTION>0
collapse (sum)  exiter entrant total, by( oil_gas year)
gen share_entry= entrant/ total
gen share_exit= exiter/ total
replace share_entry=. if year==2006
*replace share_exit=. if year<1992
gen w1=  share_exit if oil_gas==1
gen w2=  share_exit if oil_gas==0
label var w1 "OIL or GAS"
label var w2 "No OIL nor GAS"
gen w11=  share_entry if oil_gas==1
gen w22=  share_entry if oil_gas==0
label var w11 "OIL or GAS"
label var w22 "No OIL nor GAS"
twoway (line  w11 year if year>1991) (line  w22 year if year>1991, lpattern(dash) ytitle(Firm entry rate) xtitle(""))
***graph export "tex\entry_rate_test.eps", as(eps) preview(off) replace
egen mean_w1=mean(w1) if year<2006 
egen mean_w2=mean(w2) if year<2006 
label var mean_w1 "Mean - OIL or GAS"
label var mean_w2 "Mean- No OIL nor GAS"
*twoway (line  w1 year if year<2006 ) (line  mean_w1 year if year<2006 ) (line  mean_w2 year if year<2006 ) (line  w2 year if year<2006 , lpattern(dash) ytitle(Firm exit rate) xtitle(""))
twoway (line  w1 year if year<2006 )  (line  w2 year if year<2006 , lpattern(dash) ytitle(Firm exit rate) xtitle(""))
***graph export "tex\exit_rate2_test.eps", as(eps) preview(off) replace
restore



*---------------------------------------


use firms_indonesia.dta, clear
merge m:1 code98 year using oil_code98.dta
rename  _merge _merge_oil

merge m:1 code98 year using gdps_98codes.dta
rename  _merge _merge_gdp

merge m:1 code98 year using code98_wells.dta
rename  _merge _merge_wells
replace wells=0 if wells==.
replace non_dry=0 if non_dry==.

*merge discovery rates
merge m:1 code98 year using discovery_rates.dta
rename _merge _merge_disc

*Avg wages etc... by type of kab
preserve
drop if year<1989
bys id year: gen trend=_n
drop if trend>1
drop trend
merge 1:1 id year using products
drop if _merge==2
drop _merge
merge 1:1 id year using quality_id
drop if _merge==2
drop _merge
merge m:1 year using oil_price
drop if _merge==2
drop _merge

gen wage_rate= wages_total/ empl_total
gen productivity=output/empl_total
gen unit_value=unit_value_core
gen real_wage=wage_rate/unit_value_core
gen real_wage2=wage_rate*100/wpi
gen wage_prod=  wages_prod/empl_prod 
gen wage_nonprod=  wages_nonprod/empl_nonprod  
gen cost_prod=   labcost_prod / empl_prod 
gen cost_nonprod= labcost_nonprod /empl_nonprod 

****************************
*two groups of firms (think foreign firms, State-owned, lucky kabs etc...)
****************************
gen public=1 if foreign_perc<10 & privdom_perc<50

gen oil_gas=0
replace oil_gas=1 if TOTAL_PRODUCTION>0

*extreme counterfactual
*drop if public==1
*drop if foreign_perc>10
bys id: egen maxwell=max( explor_wells)
keep if maxwell>0
bys id: egen mean_luck=mean( oil_gas_disc_rate )
replace oil_gas=0
replace oil_gas=1 if mean_luck>.9 & TOTAL_PRODUCTION>0

***************************************

drop if id==.
xtset id year
bys id: egen maxyear=max(year)
gen exit=0
replace exit=1 if year== maxyear
gen exiter=0
replace exiter=1 if exit==1
gen empl_growth=(f.empl_total-empl_total)/empl_total
gen wage_growth=(f.wage_rate-wage_rate)/wage_rate
gen employ=empl_total
gen outpoot=output

collapse (mean) productivity real_wage real_wage2 wpi unit_value wage_rate wage_prod wage_nonprod cost_prod cost_nonprod  employ empl_growth wage_growth output exiter, by(year oil_gas)

foreach x in productivity real_wage real_wage2  unit_value wage_rate wage_prod wage_nonprod employ output {
replace  `x'=ln(`x')
}
drop if year==.
keep if year>1989
drop if year>2005

gen non_prod_prod= wage_nonprod -wage_prod

gen w1=.
gen w2=.

lab var productivity "Output / Employees"
lab var output "Output"
lab var employ "Employees"
lab var wage_rate "Wage rate"
lab var wage_prod "Wage rate - Blue collar"
lab var wage_nonprod "Wage rate - White collar"
lab var non_prod_prod "White collar wage / Blue collar wage"
lab var exiter "Share of exiters"
lab var real_wage "Wage rate / unit value"
lab var unit_value "Unit value"
lab var real_wage2 "Wage rate / WPI"
lab var wpi "WPI"


foreach x in productivity  wage_rate real_wage2 wpi wage_prod wage_nonprod employ output exiter non_prod_prod{
local vtext : variable label `x' 
replace w1=`x'  if oil_gas==1
replace w2=`x'  if oil_gas==0
label var w1 "Luck in exploration" 
label var w2 "No luck"
*label var w1 "Oil and Gas" 
*label var w2 "No Oil and Gas"
twoway (line w1 year ) (line  w2 year , lpattern(dash) ytitle(`vtext')  xtitle("")) 
***graph export `x'_timeline2_test.eps, as(eps) preview(off) replace
}
*
foreach x in  real_wage unit_value {
local vtext : variable label `x' 
replace w1=`x'  if oil_gas==1
replace w2=`x'  if oil_gas==0
label var w1 "Luck in exploration" 
label var w2 "No luck"
*label var w1 "Oil and Gas" 
*label var w2 "No Oil and Gas"
twoway (line w1 year if year>1997) (line  w2 year  if year>1997, lpattern(dash) ytitle(`vtext')   xtitle("")) 
***graph export `x'_timeline2_test.eps, as(eps) preview(off) replace
}
*



*********
***products timeline
use regdata, clear
drop if year<1989
merge 1:1 id year using products
drop if _merge==2
gen oil_gas=0
replace oil_gas=1 if TOTAL_PRODUCTION>0
drop if year<1998
gen  nb_products2= nb_products
collapse (mean)  nb_products (sum)  nb_products2 , by(year oil_gas)
gen w1= nb_products  if oil_gas==1
gen w2= nb_products  if oil_gas==0
label var w1 "OIL or GAS"
label var w2 "No OIL nor GAS"
drop if year>2006
twoway (line w1 year ) (line  w2 year , lpattern(dash) ytitle(Average nb of products) xtitle(""))
***graph export "tex\prod_timeline_test.eps", as(eps) preview(off) replace
replace w1= nb_products2  if oil_gas==1
replace w2= nb_products2  if oil_gas==0
twoway (line w1 year ) (line  w2 year , lpattern(dash) ytitle(Total nb of products) xtitle(""))
***graph export "tex\prod_timeline2_test.eps", as(eps) preview(off) replace

**************


*scatter of wages vs oil-gas and success rate
preserve
gen wage_rate= wages_total/ empl_total
gen a_wage=asinh(wage_rate)
gen a_total=asinh(TOTAL_PRODUCTION)

gen success_rate_90s= success_rate if year>1989 & year<2000
bys code98: egen success_rate_1990s=mean(success_rate_90s)
gen success_rate_00s= success_rate if year>1999 & year<2010
bys code98: egen success_rate_2000s=mean(success_rate_00s)

gen wells_90s= wells if year>1989 & year<2000
bys code98: egen wells_1990s=mean(wells_90s)
gen wells_00s= wells if year>1999 & year<2010
bys code98: egen wells_2000s=mean(wells_00s)

gen wage_rate_00s= a_wage if year>1999 & year<2010
bys code98: egen wage_2000s=mean(wage_rate_00s)
bys id: egen wage_00s=mean(wage_rate_00s)
gen total_00s= a_total if year>1999 & year<2010
bys code98: egen total_2000s=mean(total_00s)

reg wage_2000s total_00s if year==2000, cluster(kab)
regplot total_00s, xtitle(Oil and gas production) ytitle(Wage rate) title(2000s) msymbol(circle_hollow) mcolor(blue) note("coef =.065,(kab-cluster)se =.019",   size(small) position(7) ring(0))
***graph export "tex\wages_total_2000s_test.eps", as(eps) preview(off) replace
reg wage_00s a_total if year==2000, cluster(kab)
regplot a_total, xtitle(Oil and gas production) ytitle(Wage rate) title(2000s) msymbol(circle_hollow) mcolor(blue) note("coef =.094,(kab-cluster)se =.026",   size(small) position(7) ring(0))
***graph export "tex\firm_wages_total_2000s_test.eps", as(eps) preview(off) replace

reg wage_2000s success_rate_2000s if year==2000, cluster(kab)
regplot , xtitle(Success rate) ytitle(Wage rate) title(2000s) msymbol(circle_hollow) mcolor(blue) note("coef =.426,(kab-cluster)se =.202",   size(small) position(7) ring(0))
***graph export "tex\wages_success_2000s_test.eps", as(eps) preview(off) replace
reg wage_00s success_rate_2000s if year==2000, cluster(kab)
regplot , xtitle(Success rate) ytitle(Wage rate) title(2000s) msymbol(circle_hollow) mcolor(blue) note("coef =.468,(kab-cluster)se =.191",   size(small) position(7) ring(0))
***graph export "tex\firm_wages_success_2000s_test.eps", as(eps) preview(off) replace

restore

*asymmetries across districts

preserve
collapse  TOTAL_PRODUCTION, by( year code98)
drop if year<1990
drop if year==.
egen id=group(code98)
gen prod=asinh( TOTAL_PRODUCTION)
xtset id year
gen chg= prod-l.prod
bys id: egen a=mean(prod)
twoway  (kdensity  chg if a>0 & year==1995)  (kdensity  chg if a>0 & year==2000, lpattern(dash)) (kdensity  chg if a>0 & year==2005, lpattern(dot)), xtitle(Total production growth) ytitle(Kernel density) legend(lab(1 1995) lab(2 2000) lab(3 2005) col(3))
***graph export "tex\changes_test.eps", as(eps) preview(off) replace
restore 


*industries table
merge m:1 isic using desc_merge
tabulate sector  if year==2000
*paste to excel and edit


***wage coeffs by sector
preserve
merge m:1 isic using desc_merge.dta
drop _merge
gen isic2=int( isic4/100)
merge m:1 isic2 using isic2
drop _merge
gen wage_rate= wages_total/ empl_total
gen a_wage=asinh(wage_rate)
gen a_total=asinh(TOTAL_PRODUCTION)

gen success_rate_90s= success_rate if year>1989 & year<2000
bys code98: egen success_rate_1990s=mean(success_rate_90s)
gen success_rate_00s= success_rate if year>1999 & year<2010
bys code98: egen success_rate_2000s=mean(success_rate_00s)

gen wells_90s= wells if year>1989 & year<2000
bys code98: egen wells_1990s=mean(wells_90s)
gen wells_00s= wells if year>1999 & year<2010
bys code98: egen wells_2000s=mean(wells_00s)

gen wage_rate_00s= a_wage if year>1999 & year<2010
bys code98: egen wage_2000s=mean(wage_rate_00s)
bys id: egen wage_00s=mean(wage_rate_00s)
gen total_00s= a_total if year>1999 & year<2010
bys code98: egen total_2000s=mean(total_00s)

gen population_1990s= asinh(population) if year>1989 & year<2000
bys code98: egen a_population_1990s=mean(population_1990s)

keep if year==2000

parmby "reg wage_2000s total_2000s a_population_1990s, cluster(code98)", by( isic_2d_name) saving(ols_by_2.dta, replace)

parmby "ivreg wage_2000s a_population_1990s (total_2000s=success_rate_1990s), cluster(code98)", by(isic_2d_name) saving(iv_by_2.dta, replace)


use ols_by_2.dta , clear
keep if parm=="total_2000s"
drop if dof<50
keep if  min95>0
gsort  -estimate
gen trend=_n
replace  isic_2d_name=subinstr( isic_2d_name, "Manufacture of","", .)
replace  isic_2d_name=subinstr( isic_2d_name, "manufacture of","", .)
replace  isic_2d_name=subinstr( isic_2d_name, "wood and of","", .)
replace  isic_2d_name=proper( isic_2d_name)
set obs 11
replace trend = 11 in 11

eclplot estimate min95 max95   trend, horizontal  ciopts(lcolor(blue)) plot(scatter  trend min95, msymbol(none) mlabel( isic_2d_name)  mlabposition(4) mlabsize(small)) scheme(s1color) ytitle("") ylabel(none)   xtitle("Effect of oil and gas on wages")
***graph export "tex\by_isic2_test.eps", as(eps) preview(off) replace





*****************************************************************
***cross section in growth rates with oil and gas booms and busts
*****************************************************************


use regdata, clear

keep if year>1989

gen isic2=int(isic4/100)
xtset id year
gen luck=non_dry/wells
gen ch_prod=TOTAL_PRODUCTION-l.TOTAL_PRODUCTION if year>1999
gen l_pop=ln(population)
gen dl_pop=l_pop-l.l_pop
gen og=0
replace og=1 if TOTAL_PRODUCTION>0
gen totprod2000s=TOTAL_PRODUCTION if year>1999
gen exit=0
bys id: egen maxyear=max(year)
replace exit=1 if year== maxyear


gen dl_wage_rate_2000s=dl_wage_rate if year>1999

collapse  ch_prod  og exit willexit dl_wage_rate luck dl_pop dl_empl_total dl_output dl_wage_nonprod dl_wage_prod TOTAL_PRODUCTION CAPEX_REAL OPEX_REAL privdom_perc foreign_perc dl_wage_rate_2000s totprod2000s (sum) wells non_dry, by(id code98 prov isic2)


label var  og "Oil and Gas District 2000s"
*label var  ogboom "  x boom"
*label var  ogbust "  x bust"
label var   luck "Discovery rate"
label var    dl_pop "Pop growth"
label var    dl_wage_rate "Wage growth"
label var    dl_wage_rate_2000s "Wage growth 2000s"
label var exit "Exit"


gen ogprod=asinh(totprod2000s)
lab var ogprod "Oil and Gas 2000s"
gen ogprod2=asinh(TOTAL_PRODUCTION)
lab var ogprod2 "Oil and Gas"


eststo clear

eststo: reg  dl_wage_rate  luck  , cluster(code98)
eststo: reghdfe  dl_wage_rate  luck   ,  absorb( isic2) cluster(code98)
eststo: reg  dl_wage_rate_2000s  luck   ,   cluster(code98)
eststo: reghdfe  dl_wage_rate_2000s  luck   ,  absorb( isic2) cluster(code98)
eststo: reg  exit  luck   ,   cluster(code98)
eststo: reghdfe  exit  luck   ,  absorb( isic2) cluster(code98)
***esttab using tex\wage_luck_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2) nonote addnotes(\tiny Kabubpaten-clustered s.e. in parenthesis. * 0.10 ** 0.05 *** 0.01.  Columns 2,4, and 6 includes industry fixed effects.)  

merge m:1 isic2 using isic2.dta
drop _merge

parmby "reg exit luck, cluster(code98)", by( isic_2d_name) saving(ols_luck.dta, replace)
parmby "reg dl_wage_rate luck, cluster(code98)", by( isic_2d_name) saving(ols_luck2.dta, replace)
preserve
use ols_luck.dta , clear
keep if parm=="luck"
keep if  min95>0
gsort  -estimate
gen trend=_n
replace  isic_2d_name=subinstr( isic_2d_name, "Manufacture of","", .)
replace  isic_2d_name=subinstr( isic_2d_name, "manufacture of","", .)
replace  isic_2d_name=subinstr( isic_2d_name, "wood and of","", .)
replace  isic_2d_name=proper( isic_2d_name)
eclplot estimate min95 max95   trend, horizontal  ciopts(lcolor(blue)) plot(scatter  trend min95, msymbol(none) mlabel( isic_2d_name)  mlabposition(4) mlabsize(small)) scheme(s1color) ytitle("") ylabel(none)   xtitle("Effect of exploration luck on firm exit")


use ols_luck2.dta , clear
keep if parm=="luck"
keep if  estimate>0
replace estimate=. if isic_2d_name==""
replace min95=. if isic_2d_name==""
replace max95=. if isic_2d_name==""
gsort  -estimate
gen trend=_n
replace  isic_2d_name=subinstr( isic_2d_name, "Manufacture of","", .)
replace  isic_2d_name=subinstr( isic_2d_name, "manufacture of","", .)
replace  isic_2d_name=subinstr( isic_2d_name, "wood and of","", .)
replace  isic_2d_name=proper( isic_2d_name)
replace  isic_2d_name="Products of Wood, Cork, and Straw"  if isic_2d_name=="   Products Of Wood And Cork, Except Furniture;  Articles Of Straw And Plaiting Materials"
replace  isic_2d_name="Leather Luggage, Handbags, Saddlery, Harness And Footwear"  if isic_2d_name==" Tanning And Dressing Of Leather;  Luggage, Handbags, Saddlery, Harness And Footwear"
eclplot estimate min95 max95   trend, horizontal  ciopts(lcolor(blue)) plot(scatter  trend estimate, msymbol(none) mlabel( isic_2d_name)  mlabposition(5) mlabsize(small)) scheme(s1color) ytitle("") ylabel(none) xline(0) xsize(6) ysize(4)   xtitle("Effect of exploration luck on wage growth")
***graph export "tex\industry_luck_test.eps", as(eps) preview(off) replace

restore




*********************************************************************************************************
*********************************************************************************************************
*********************************************************************************************************

***********************************************PANEL REGRESSIONS*****************************************

*********************************************************************************************************
*********************************************************************************************************
*********************************************************************************************************

use regdata, clear
drop if year<1990


drop oil_gas_disc_rate
*merge discovery rates
merge m:1 code98 year using discovery_rates.dta
drop _merge

drop if id==.
xtset id year
*true digging
gen l1wells=l.wells
foreach x in 2 3 4 5 6 7 8 9 10 11 12 13 14 15{
gen l`x'wells=l`x'.wells
}
*
gen luck=oil_gas_disc_rate
bys id: egen mean_luck=mean( oil_gas_disc_rate )
gen l1luck=l.luck
foreach x in 2 3 4 5 6 7 8 9 10 11 12 13 14 15{
gen l`x'luck=l`x'.luck
}
*
gen max_wells= max(wells,l1wells, l2wells,l3wells , l4wells,l5wells, l6wells,l7wells , l8wells,l9wells, l10wells,l11wells, l12wells, l13wells, l14wells,l15wells )


gen max_luck= max(l5luck, l6luck,l7luck)


*merge products, quality_id
*product data is 1998-2008

merge 1:1 id year using products
drop if _merge==2
drop _merge
merge 1:1 id year using quality_id
drop if _merge==2
drop _merge
merge m:1 year using oil_price
drop if _merge==2
drop _merge
*merge m:1 code98 year using transfers
*drop if _merge==2
*drop _merge

*gen oil_gas_transfer=asinh( revshoil+ revshgas)
*lab var oil_gas_transfer "Oil and gas transfer"

*luck in previous years
*replace Z= max( success_rate, l1_success_rate, l2_success_rate,l3_success_rate)
*replace Z= max(success_rate, l1_success_rate, l2_success_rate,l3_success_rate, l4_success_rate,l5_success_rate , l6_success_rate, l7_success_rate,l8_success_rate, l9_success_rate,l10_success_rate  )

replace Z=max_luck
label var  Z "Max Discovery rate (-15Y)"
*only true luck, no fake zeros
replace Z=. if max_wells==0
replace Z=. if max_wells==.

gen iv2= ln(crude_price_2010_dollars)* mean_luck


gen exit=0
bys id: egen maxyear=max(year)
replace exit=1 if year== maxyear

gen X=a_TOTAL_PRODUCTION


*Dummy for inclusion depending on entry and exit. We now throw out only entry ones, so the number of firms in this group goes down
egen mwillexit=mean(willexit), by(id)
egen mjustentered=mean(justentered), by(id)
egen mjustentered_1=mean(justentered_1), by(id)
egen mjustentered_3=mean(justentered_3), by(id)
egen mjustentered_c=mean(justentered_c), by(id)

gen Dincl=1
replace Dincl=0 if mjustentered>0 & mjustentered!=.
replace Dincl=0 if mjustentered_1>0 & mjustentered_1!=.
replace Dincl=0 if mjustentered_3>0 & mjustentered_3!=.
replace Dincl=0 if mjustentered_c>0 & mjustentered_c!=.
replace Dincl=0 if mwillexit>0 & mwillexit!=.

gen Dnincl=.
replace Dnincl=1 if Dincl==0
replace Dnincl=0 if Dincl==1

*tab D*incl

save regdata_temp, replace


***graphs
use  regdata_temp, replace
bys kabupaten year: gen trendy=_n



bys  year: egen mean_kab_luck=mean(oil_gas_disc_rate)
bys  year: egen min_kab_luck=min(oil_gas_disc_rate)
bys  year: egen max_kab_luck=max(oil_gas_disc_rate)
bys  year: egen sd_kab_luck=sd(oil_gas_disc_rate)
replace min_kab_luck= mean_kab_luck-1.96*sd_kab_luck/sqrt(37)
replace max_kab_luck= mean_kab_luck+1.96*sd_kab_luck/sqrt(37)

twoway  (rarea min_kab_luck max_kab_luck year) (line mean_kab_luck  year) (line crude_price_2010_dollars year, yaxis(2) )if trendy==1 ,  play(discoveries) 
graph play disc2.grec
***graph export discovery_rate_time.pdf, as(pdf) replace

bys  year: egen mean_kab_prod=mean( TOTAL_PRODUCTION)
bys  year: egen min_kab_prod=min( TOTAL_PRODUCTION)
bys  year: egen max_kab_prod=max( TOTAL_PRODUCTION)
bys  year: egen sd_kab_prod=sd( TOTAL_PRODUCTION)
replace min_kab_prod= mean_kab_prod-1.96*sd_kab_prod/sqrt(37)
replace max_kab_prod= mean_kab_prod+1.96*sd_kab_prod/sqrt(37)

twoway  (rarea min_kab_prod max_kab_prod year) (line mean_kab_prod  year) (line crude_price_2010_dollars year, yaxis(2) )if trendy==1 , play(discoveries)
*edit
***graph export production_time.pdf, as(pdf) replace

by year: egen sum_exp_wells=sum(explor_wells)

*
*-------------------------------------------------------------------------
*Firm and year FE



use regdata_temp, clear
drop if year>2005
gen oil_gas=0
bys kab: egen oil=mean(TOTAL_PRODUCTION)
replace oil_gas=1 if oil>0 & oil!=.
gen iv_bunny=oil_gas*ln(crude_price_2010_dollars)
lab var iv_bunny  "Non-dry district x Oil price"
gen ln_inv=asinh( inv_total)
gen prod_chg=1- status_quo
replace X=a_TOTAL_PRODUCTION

bys kab: egen sum_oil=sum(TOTAL_PRODUCTION)

gen iv_prod=sum_oil/1000000*ln(crude_price_2010_dollars)

sort id year
by id: gen goofie=_n


*labor prod 1990
gen lprod_1990=Y_L if  year<2000 & goofie<4
bys id: egen mean_lprod_1990 =mean(lprod_1990 )
replace mean_lprod_1990=asinh(mean_lprod_1990)
lab var mean_lprod_1990 "1990s labor productivity"
gen pop1990= mean_lprod_1990*iv2
lab var pop1990  "$\times$ 1990s labor productivity"

gen testprod=empl_prod_total if empl_prod_total>=0
gen testnonprod=empl_nonprod_total if empl_nonprod_total>=0



replace empl_nonprod_total=1 if empl_nonprod_total<0
replace empl_prod_total=1 if empl_prod_total<0


*skill intensity
gen skill_intensity=ln(empl_nonprod_total)-ln(empl_prod_total) if year==1990
bys id: egen firm_skill=mean(skill_intensity)
bys isic4: egen sector_skill=mean(skill_intensity)
gen firm_skill_iv2=iv2*firm_skill
gen sector_skill_iv2=iv2*sector_skill
lab var sector_skill_iv2 "$\times$ sector skill intensity"
lab var firm_skill_iv2  "$\times$ 1990 skill intensity"



*price variation within a product_code nationally in 1990s
gen a_price=asinh(unit_value_core)
gen aprice1990s=a_price if year<2000
bys product_code: egen sector_price_sd=sd(aprice1990s)
gen sector_price_iv2=iv2*sector_price_sd
lab var sector_price_iv2 "$\times$ price std. dev."

*labor intensity
gen labor_intensity=ln( wages_prod + wages_nonprod )-ln(machine) if year<2000 & goofie<4
bys id: egen labor_int=mean(labor_intensity)
replace labor_intensity =iv2*labor_int
lab var labor_intensity "$\times$ 1990s labor intensity"
*lab var sector_labor_int_iv2 "$\times$ sector labor intensity"




*tradability
replace export_perc=export_perc/100
gen exp_share=export_perc  if year<2000 & goofie<4
bys id: egen exporter_share=mean(exp_share)
gen exp_iv2=iv2*exporter_share
lab var exp_iv2 "$\times$ 1990s export share"




*replace X=oil_gas_transfer
label var X "Oil and gas"
bys kabupaten: egen non_dry_kab=sum(X)
*replace non_dry_kab=1 if non_dry_kab>0
gen iv2_intense= ln(crude_price_2010_dollars)* non_dry
*keep if Dnincl!=`incl'
gen a_real_wage=asinh(100*wage_rate/wpi)
gen a_real_wage2=asinh(wage_rate/unit_value_core)
gen aint=asinh( intermediates)
*xi i.year
bys year: gen time_trend=_n
gen iv2_placebo= time_trend* mean_luck

gen amachines=asinh(machine)

*gen a_price=asinh(uv_core)
replace Y_L=asinh(Y_L)
label var  Z "Max Discovery rate (-6Y)"
lab var a_wage_rate "Wage"
lab var a_real_wage "Wage / WPI"
lab var a_real_wage2 "Wage / Unit value"
lab var a_empl_total "Empl"
lab var a_price "Unit value"
lab var exit "Exit"
lab var wpi "WPI"
lab var iv2 "Avg success rate x Oil price"
lab var iv2_intense "Total production x Oil price"
lab var iv2_placebo "Avg success rate x Time trend"
lab var net_drop "Product drop"
lab var export "Exporter"
lab var Y_L "Labor Prod."
lab var a_wages_nonprod "Wage non-production"
lab var a_wages_prod "Wage production"
lab var a_output "Output" 
lab var ln_inv "Investment"
lab var mat_imp_sh "Share imported inputs"
lab var prod_chg "Product churn"
lab var willexit "Exit in 2 years"
lab var aint "Intermediate inputs"
lab var justentered_c "Entry"
lab var product_drop "Product drop"
lab var product_intro "Product intro"
lab var amachines "Machines"

reg a_wage_rate    product_drop
capture drop e_smpl
predict e_smpl if e(sample), res

egen ind_year=group(isic year)

*

*reghdfe a_wage_rate   iv2   if e_smpl!=.  , absorb(FE_YEAR=i.id  FE_FIRM=i.ind_year) cluster(id kab_year)
*capture drop e_smpl2
*predict e_smpl2 if e(sample), res

eststo clear


ogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2 F1 pr1 idp, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq" "F instr" "Part. R-sq instr" "Underid. test p")) nonote  

*keep if mean_lprod_1990!=.
eststo clear
*iv_bunny iv2_intense iv_prod
foreach y in  a_wage_rate a_wages_prod a_wages_nonprod   a_price    {
eststo: reghdfe `y'  iv_prod    if e_smpl!=.   , absorb(id ind_year) cluster(kabupaten) 
***esttab using tex\production_instead_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq")) nonote 
}
eststo clear


foreach y in  a_wage_rate a_wages_prod a_wages_nonprod   a_price    {
eststo: reghdfe `y'  iv2     if e_smpl!=.   , absorb(id ind_year) cluster(kabupaten) 
}
***esttab using tex\Table2_A`incl'_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq")) nonote 
eststo clear
foreach y in  exit justentered_c product_drop product_intro  a_output a_empl_total    {
eststo: reghdfe `y' iv2       if e_smpl!=.   , absorb(id ind_year) cluster(kabupaten) 
}
***esttab using tex\Table3_A`incl'_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq")) nonote 
eststo clear
foreach y in  Y_L export_perc mat_imp_sh  aint amachines   {
eststo: reghdfe `y' iv2       if e_smpl!=.   , absorb(id ind_year) cluster(kabupaten) 
}
***esttab using tex\Table4_A`incl'_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq")) nonote 
eststo clear
*******

eststo clear


foreach y in  a_wage_rate a_wages_prod a_wages_nonprod   a_price    {
eststo: reghdfe `y'  iv2   iv2_placebo  if e_smpl!=.   , absorb(id ind_year) cluster(kabupaten) 
}
***esttab using tex\Table2_A2`incl'_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq")) nonote 
eststo clear
foreach y in  exit justentered_c product_drop product_intro  a_output a_empl_total    {
eststo: reghdfe `y' iv2  iv2_placebo     if e_smpl!=.   , absorb(id ind_year) cluster(kabupaten) 
}
***esttab using tex\Table3_A2`incl'_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq")) nonote 
eststo clear
foreach y in  Y_L export_perc mat_imp_sh  aint amachines   {
eststo: reghdfe `y' iv2   iv2_placebo    if e_smpl!=.    , absorb(id ind_year) cluster(kabupaten) 
}
***esttab using tex\Table4_A2`incl'_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq")) nonote 
eststo clear
*******
eststo clear


foreach y in  a_wage_rate a_wages_prod a_wages_nonprod   a_price    {
eststo: reghdfe `y'  iv2     if e_smpl!=. & labor_intensity!=.  , absorb(id ind_year) cluster(kabupaten) 
}
***esttab using tex\Table2_A3`incl'_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq")) nonote 
eststo clear
foreach y in  exit justentered_c product_drop product_intro  a_output a_empl_total    {
eststo: reghdfe `y' iv2       if e_smpl!=. & labor_intensity!=.  , absorb(id ind_year) cluster(kabupaten) 
}
***esttab using tex\Table3_A3`incl'_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq")) nonote 
eststo clear
foreach y in  Y_L export_perc mat_imp_sh  aint amachines   {
eststo: reghdfe `y' iv2       if e_smpl!=. & labor_intensity!=.   , absorb(id ind_year) cluster(kabupaten) 
}
***esttab using tex\Table4_A3`incl'_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq")) nonote 
eststo clear
*******

foreach y in  a_wage_rate a_wages_prod a_wages_nonprod   a_price    {
eststo: reghdfe `y'  iv2  pop1990   if e_smpl!=. & labor_intensity!=.  , absorb(id ind_year) cluster(kabupaten) 
}
***esttab using tex\Table2_B`incl'_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq")) nonote 
eststo clear
foreach y in  exit justentered_c product_drop product_intro  a_output a_empl_total    {
eststo: reghdfe `y' iv2   pop1990    if e_smpl!=. & labor_intensity!=.  , absorb(id ind_year) cluster(kabupaten) 
}
***esttab using tex\Table3_B`incl'_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq")) nonote 
eststo clear
foreach y in  Y_L export_perc mat_imp_sh  aint amachines   {
eststo: reghdfe `y' iv2  pop1990     if e_smpl!=. & labor_intensity!=.   , absorb(id ind_year) cluster(kabupaten) 
}
***esttab using tex\Table4_B`incl'_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq")) nonote 
eststo clear
*******


foreach y in  a_wage_rate a_wages_prod a_wages_nonprod   a_price    {
eststo: reghdfe `y'  iv2  labor_intensity   if e_smpl!=. & labor_intensity!=.  , absorb(id ind_year) cluster(kabupaten) 
}
***esttab using tex\Table2_C`incl'_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq")) nonote 
eststo clear
foreach y in  exit justentered_c product_drop product_intro  a_output a_empl_total    {
eststo: reghdfe `y' iv2   labor_intensity    if e_smpl!=. & labor_intensity!=.  , absorb(id ind_year) cluster(kabupaten) 
}
***esttab using tex\Table3_C`incl'_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq")) nonote 
eststo clear
foreach y in  Y_L export_perc mat_imp_sh  aint amachines   {
eststo: reghdfe `y' iv2  labor_intensity     if e_smpl!=. & labor_intensity!=.   , absorb(id ind_year) cluster(kabupaten) 
}
***esttab using tex\Table4_C`incl'_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq")) nonote 
eststo clear

foreach y in  a_wage_rate a_wages_prod a_wages_nonprod   a_price    {
eststo: reghdfe `y'  iv2  exp_iv2   if e_smpl!=. & labor_intensity!=.  , absorb(id ind_year) cluster(kabupaten) 
}
***esttab using tex\Table2_D`incl'_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq")) nonote 
eststo clear
foreach y in  exit justentered_c product_drop product_intro  a_output a_empl_total    {
eststo: reghdfe `y' iv2   exp_iv2    if e_smpl!=. & labor_intensity!=.  , absorb(id ind_year) cluster(kabupaten) 
}
***esttab using tex\Table3_D`incl'_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq")) nonote 
eststo clear
foreach y in  Y_L export_perc mat_imp_sh  aint amachines   {
eststo: reghdfe `y' iv2  exp_iv2     if e_smpl!=. & labor_intensity!=.   , absorb(id ind_year) cluster(kabupaten) 
}
***esttab using tex\Table4_D`incl'_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq")) nonote 
eststo clear

foreach y in  a_wage_rate a_wages_prod a_wages_nonprod   a_price    {
eststo: reghdfe `y'  iv2  firm_skill_iv2   if e_smpl!=. & labor_intensity!=.  , absorb(id ind_year) cluster(kabupaten) 
}
***esttab using tex\Table2_E`incl'_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq")) nonote 
eststo clear
foreach y in  exit justentered_c product_drop product_intro  a_output a_empl_total    {
eststo: reghdfe `y' iv2   firm_skill_iv2    if e_smpl!=. & labor_intensity!=.  , absorb(id ind_year) cluster(kabupaten) 
}
***esttab using tex\Table3_E`incl'_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq")) nonote 
eststo clear
foreach y in  Y_L export_perc mat_imp_sh  aint amachines   {
eststo: reghdfe `y' iv2  firm_skill_iv2     if e_smpl!=. & labor_intensity!=.   , absorb(id ind_year) cluster(kabupaten) 
}
***esttab using tex\Table4_E`incl'_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq")) nonote 
eststo clear

*mean_lprod_1990 firm_skill labor_int exporter_share
* pop1990  firm_skill_iv2 labor_intensity exp_iv2
*foreach x in  exit a_wage_rate a_price a_output Y_L {
gen mean_lprod_1990rhs=pop1990
gen labor_intrhs=labor_intensity
gen  exporter_sharerhs= exp_iv2
gen firm_skillrhs=firm_skill_iv2

lab var mean_lprod_1990 "Labor productivity in 1990s" 
lab var labor_int "Labor intensity in 1990s" 
lab var exporter_share  "Export share in 1990s" 

*----------------------------------------
reg a_wage_rate    product_drop
capture drop e_smpl
predict e_smpl if e(sample), res


save temp_th, replace

*------------------------------------
*Torfinn december 13 2018:
use temp_th, clear
foreach y in mean_lprod_1990  labor_int exporter_share firm_skill {
*preserve
capture drop res_`y'
reghdfe `x'   `y'rhs iv2   if e_smpl!=.  & labor_intensity!=. , absorb(id ind_year)  cluster(kabupaten ) residuals(res_`y')
*reg `x'   `y'rhs iv2   if e_smpl!=.  & labor_intensity!=.
sum mean_lprod_1990 labor_int exporter_share firm_skill `y'rhs if res_`y'!=., d
}

egen antall_id=group(id)
sum antall*

foreach y in mean_lprod_1990 {
keep if res_`y'!=.
keep mean_lprod_1990 labor_int exporter_share firm_skill id
duplicates drop
sum mean_lprod_1990 labor_int exporter_share firm_skill, d
}


estpost tabstat mean_lprod_1990 labor_int exporter_share firm_skill, statistics(N mean sd p25 p75 min max) columns(statistics)
esttab . using "hetero_descstat.csv", replace cells("count mean(fmt(a2)) sd(fmt(a2)) p25(fmt(a2)) p75(fmt(a2)) min(fmt(a2)) max(fmt(a2))") /// 
label nomtitles nonumbers noobs width(\hsize) 



*****************************





foreach char in mean_lprod_1990  labor_int exporter_share firm_skill {
*foreach y in  a_wage_rate a_wages_prod a_wages_nonprod   a_price    {
foreach y in  a_wage_rate   {
eststo: reghdfe `y'  iv2  `char'rhs   if e_smpl!=. & labor_intensity!=.  , absorb(id ind_year) cluster(kabupaten) 
}
}

foreach y in mean_lprod_1990 labor_int exporter_share firm_skill {
sum `y' if res_`y'!=., d
}
keep if mean_lprod_1990!=.
keep mean_lprod_1990 labor_int exporter_share firm_skill
duplicates drop

sum mean_lprod_1990 labor_int exporter_share firm_skill, d


gen D1=1

foreach y in mean_lprod_1990  labor_int exporter_share firm_skill {
foreach stat in sd p25 p75 min max { 
gen `y'_`stat'=`y'
}
}

collapse (sum) D1 (mean) mean_lprod_1990 labor_int exporter_share firm_skill (sd) mean_lprod_1990_sd labor_int_sd exporter_share_sd firm_skill_sd (p25) mean_lprod_1990_p25 labor_int_p25 exporter_share_p25 firm_skill_p25 (p75) mean_lprod_1990_p75 labor_int_p75 exporter_share_p75 firm_skill_p75 (min) mean_lprod_1990_min labor_int_min exporter_share_min firm_skill_min (max) mean_lprod_1990_max labor_int_max exporter_share_max firm_skill_max


*-----------------------------------




foreach y in  a_wage_rate a_wages_prod a_wages_nonprod   a_price    {
*foreach char in mean_lprod_1990 labor_intrhs exporter_sharerhs firm_skillrhs if e_smpl_descstat {
foreach char in mean_lprod_1990 labor_int exporter share{
foreach xiv in pop1990 export_perc labor_intensity exp_iv2 firm_skill_iv2 {
reg `y'  iv2  `xiv'  if e_smpl!=. & `char'!=.
capture drop e_smpl_descstat
predict e_smpl_descstat if e(sample), res
sum `char' if e_smpl_descstat!=.

}
}
sum mean_lprod_1990 labor_intrhs exporter_sharerhs firm_skillrhs if e_smpl_descstat!=.








sum mean_lprod_1990 if e_smpl!=.


stop

foreach x in  exit  a_wage_rate a_price a_output Y_L  {
foreach y in mean_lprod_1990  labor_int exporter_share firm_skill {
preserve
reghdfe `x'   `y'rhs iv2   if e_smpl!=.  & labor_intensity!=. , absorb(id ind_year)  cluster(kabupaten )
matrix b=e(b)
matrix V=e(V)
scalar b1=b[1,1]
scalar b2=b[1,2]
scalar varb1=V[1,1]
scalar varb2=V[2,2]
scalar covb1b2=V[1,2]
gen beta`x'=b2 + `y'* b1
gen se`x'=sqrt(varb2+ varb1*(`y'^2) + 2*(`y')*covb1b2)
gen upper`x'=beta`x'+1.96*se`x'
gen lower`x'=beta`x'-1.96*se`x'
collapse (mean)  beta* upper* lower* mean_lprod_1990  labor_int exporter_share firm_skill  exit  a_wage_rate a_price a_output Y_L , by(id)
lab var mean_lprod_1990 "Labor productivity in 1990s" 
lab var labor_int "Labor intensity in 1990s" 
lab var exporter_share  "Export share in 1990s" 
lab var firm_skill  "Skill intensity in 1990s"
lab var exit   "Exit"
lab var a_wage_rate  "Wage"
lab var  a_price  "Unit value"
lab var   a_output "Output"
lab var    Y_L "Labor productivity"
graph twoway (lfit beta`x' `y')  (qfit lower`x' `y', lpattern(dash))   (qfit upper`x' `y', lpattern(dash)) (kdensity `y', yaxis(2)),  yline(0)  ytitle("Effect of Avg success rate x Oil price") xsize(4) ysize(4) xtitle(`: variable label `y'') title(`: variable label `x'') legend(off) scheme(Plotplainblind) play(grint_sulawesi)
graph save Graph grint`x'`y'.gph, replace
***graph export "tex\gr_`x'`y'_test.eps", as(eps) preview(off) replace
restore
}
}
*



stop

*


gen isic2=int(isic4/100)
merge m:1 isic2 using isic2.dta
drop _merge


save reg_temp, replace

*	foreach y in  a_wage_rate a_wages_prod a_wages_nonprod   a_price exit justentered_c product_drop product_intro  a_output a_empl_total    Y_L export_perc mat_imp_sh  aint  {
	foreach y in  a_wage_rate   {
	parmby "reghdfe `y'   iv2 pop1990  if e_smpl!=.  , absorb(id year) cluster(kabupaten) ", by( isic_2d_name) saving(rf_0.dta, replace)
*	preserve
	use rf_0.dta, clear
	egen s=group(isic_2d_name)
	drop if parmseq==2

	gen estimate0=estimate
	label var estimate0 "Effect on `y'"
	
	keep s estimate0
	sort s
	save rf_0.dta, replace
	}


use reg_temp, clear
	foreach y in  a_wage_rate  exit   a_output  {
	*foreach y in  a_wage_rate a_wages_prod a_wages_nonprod   a_price exit justentered_c product_drop product_intro  a_output a_empl_total    Y_L export_perc mat_imp_sh  aint  {
	*parmby "reghdfe `y'   iv2 pop1990  if e_smpl!=.  , absorb(id year) cluster(id kab_year) ", by( isic_2d_name) saving(rf_`y'.dta, replace)
		parmby "reghdfe `y'   iv2   if e_smpl!=.  , absorb(id year) cluster(kabupaten) ", by( isic_2d_name) saving(rf_`y'.dta, replace)

	preserve
	use rf_`y'.dta , clear
	*drop if parmseq==2
	egen s=group(isic_2d_name)
	sort s
	merge 1:1 s using rf_0.dta
	
	*keep if  estimate>0
	keep if dof>50
	replace estimate=. if isic_2d_name==""
	replace min95=. if isic_2d_name==""
	replace max95=. if isic_2d_name==""

	drop if isic_2d_name==""
	drop if estimate==.

	gsort  -estimate
	gen trend=_n
	replace  isic_2d_name=subinstr( isic_2d_name, "Manufacture of","", .)
	replace  isic_2d_name=subinstr( isic_2d_name, "manufacture of","", .)
	replace  isic_2d_name=subinstr( isic_2d_name, "wood and of","", .)
	replace  isic_2d_name=proper( isic_2d_name)
	replace  isic_2d_name="Products of Wood, Cork, and Straw"  if isic_2d_name=="   Products Of Wood And Cork, Except Furniture;  Articles Of Straw And Plaiting Materials"
	replace  isic_2d_name="Leather Luggage, Handbags, Saddlery, Harness And Footwear"  if isic_2d_name==" Tanning And Dressing Of Leather;  Luggage, Handbags, Saddlery, Harness And Footwear"
	eclplot estimate min95 max95   trend, horizontal  ciopts(lcolor(blue)) plot(scatter  trend estimate, msymbol(none) mlabel( isic_2d_name)  mlabposition(12) mlabsize(vsmall)) scheme(Plotplainblind) ytitle("") ylabel(none) xline(0) xsize(6) ysize(4)   xtitle("Effect on `y'")
	****graph export "tex\by_isic2_`y'_th44_test.eps", as(eps) preview(off) replace
	*graph save Graph "tex\by_isic2_`y'889.gph", replace


	
	restore
	}
	}
*----------------------------------------------------------------


stop


********************
*firms per type of kab graph
********************
use regdata_temp_stata13, clear

*hist luck , percent ytitle(Percent of district-year) xtitle(Success rate) 
****graph export tex/hist_luck2.pdf, as(pdf)  replace


gen oil_gas=0
bys kab: egen oil=mean(TOTAL_PRODUCTION)
replace oil_gas=1 if oil>0 & oil!=.
drop if mean_luck==.
replace oil_gas=1 if mean_luck>0 & mean_luck!=.

*collapse (sum) one, by(year oil_gas)
*collapse (sum) output, by(year oil_gas)

*gen wage=ln(output)
*xtset id year
*gen wage_growth=wage-l.wage
*replace wage=wage_growth

reghdfe  wage_rate oil_gas      , absorb( year) cluster( kabupaten )
reghdfe  exit oil_gas      , absorb( year) cluster( kabupaten )
reghdfe  output oil_gas      , absorb( year) cluster( kabupaten )

*twoway( kdensity wage if year>1999 & oil_gas==1) ( kdensity wage if year>1999 & oil_gas==0, lpattern(dash)) , xsize(5) ysize(4) xtitle(Wage rate)  legend(order(1 "Luck in exploration" 2 "No luck")) 

collapse  wage_rate   crude_price_2010_dollars  , by(year oil_gas)
*collapse   exit  crude_price_2010_dollars  , by(year oil_gas)
*collapse    output crude_price_2010_dollars  , by(year oil_gas)

*rename output one
replace wage_rate=ln(wage_rate)

rename wage_rate one
*rename exit one
*rename output one

reshape wide one crude_price_2010_dollars , i(year) j(oil_gas)
gen one00=one0 if year==1990
gen one11=one1 if year==1990
egen one111=max(one11)
egen one000=max(one00)
gen one_zero= (one0/ one000)*100
gen one_one= (one1/ one111)*100
*label var one_one "Luck in exploration" 
*label var one_zero "No luck"

label var one1 "Luck in exploration" 
label var one0 "No luck"
*label var one_one "Oil and Gas" 
*label var one_zero "No Oil and Gas"

*label var one1 "Oil and Gas" 
*label var one0 "No Oil and Gas"

lab var crude_price_2010_dollars0 "Crude oil price in 2010 dollars"
twoway (line one1  year ) (line   one0 year ,  lpattern(dash) xtitle("") ytitle(Average wage)   ) (line crude_price_2010_dollars0  year , lpattern(dot) yaxis(2)) if year<2006

********************

*scatter exog
*use regdata_temp, clear
use regdata_temp_stata13, clear
collapse (sum) wells non_dry TOTAL_PRODUCTION,  by(kab year)
gen luck= non_dry/ wells
gen ln_wells=ln(wells)
lab var luck "Success rate"
lab var wells "Exploration wells"
lab var ln_wells "Exploration wells (log)"
ivreg2 luck ln_wells, cluster(kab)
reg luck ln_wells, cluster(kab)

ivreg2 luck ln_wells, robust

ivreg2 luck ln_wells, cluster(kab )
*ivreg2 luck ln_wells, robust
twoway   (lfitci luck ln_wells) (scatter luck ln_wells) , ytitle(Success rate) legend(off) note(Regression coeff: .014. Std. error clustered by district: .014) 
****graph export tex/scatter_luck_test.eps, as(eps) preview(off) replace
collapse (sum) wells non_dry TOTAL_PRODUCTION,  by(kab)
gen luck= non_dry/ wells
gen ln_wells=ln(wells)
lab var luck "Success rate"
lab var wells "Exploration wells"
lab var ln_wells "Exploration wells (log)"
ivreg2 luck ln_wells, cluster(kab)
reg luck ln_wells, cluster(kab)
gen a_prod=asinh( TOTAL_PRODUCTION)
ivreg2  a_prod luck, cluster(kab)
ivreg2  a_prod luck, robust
lab var luck "Success rate"
twoway   (lfitci luck ln_wells) (scatter luck ln_wells) , ytitle(Success rate) legend(off) note(Regression coeff: .013. Std. error clustered by district: .012) 

twoway   (lfitci a_prod luck) (scatter a_prod luck) , ytitle(Oil and gas production) legend(off) note(Regression coeff: 3.24. Robust s.e.:1.20) 
****graph export tex/scatter_luck_prod_test.eps, as(eps) preview(off) replace




*----------------------------------------------------------------
*July 11 2018: Torf to show that luck is orthogonal to district level variables. 

use regdata_temp_stata13, clear
sum year if  population!=.

collapse (sum) wells non_dry TOTAL_PRODUCTION population,  by(kab year)
gen luck= non_dry/ wells
gen ln_wells=ln(wells)
lab var luck "Success rate"
lab var wells "Exploration wells"
lab var ln_wells "Exploration wells (log)"

gen ln_population=ln(population)

sum year if luck!=.
sum year if ln_wells!=.
sum year if ln_population!=.

ivreg2 luck ln_wells
ivreg2 luck ln_population


*--------------------------------------

use regdata_temp_stata13, clear
gen population_1997_0=population if year==1997
egen population_1997=mean(population_1997), by(kab)
gen grdp_1998=grdp if year==1998

sum year if wells!=.
sum year if non_dry!=.

drop if year<1998
collapse (sum) wells non_dry TOTAL_PRODUCTION (mean) population_1997 grdp_1998,  by(kab)

gen luck= non_dry/ wells
gen ln_population_1997=ln(population_1997)
gen ln_grdp_1998=ln(grdp_1998)

lab var luck "Success rate 1998-2008"
lab var ln_population_1997 "ln population 1997"
lab var ln_grdp_1998 "ln grdp 1998"

save temp_graph_kab_th, replace

*twoway   (lfitci luck ln_population_1997) (scatter luck ln_population_1997) , ytitle(Success rate 1998-2008) legend(off) note(Regression coeff: 0.03. Robust s.e.:0.07. N=56)
ivreg2 luck ln_population_1997, robust
predict e_smpl if e(sample), res
keep if e_smpl!=.

twoway   (lfitci luck ln_population_1997) (scatter luck ln_population_1997) , ytitle(Success rate 1998-2008) legend(off) note(Regression coeff: 0.03. Robust s.e.: 0.07)  xmtick(11(1)16)
***graph export tex/scatter_luck_pop.pdf, replace

use temp_graph_kab_th, clear

ivreg2 luck ln_grdp_1998, robust
predict e_smpl if e(sample), res
keep if e_smpl!=.

twoway   (lfitci luck ln_grdp_1998) (scatter luck ln_grdp_1998), ytitle(Success rate 1998-2008) legend(off) note(Regression coeff: 0.03. Robust s.e.: 0.04)
*twoway   (lfitci luck ln_grdp_1998) (scatter luck ln_grdp_1998), ytitle(Success rate 1998-2008) legend(off) note(Regression coeff: 0.03. Robust s.e.:0.04. N=57)
***graph export tex/scatter_luck_gdrp.pdf, replace



use regdata_temp_stata13, clear
keep pop grdp  province kabupaten year 
sort  province kabupaten year 
duplicates drop




***the exit stuff************************************************

*-------------------------------------------------------------------------
*Firm and year FE

use regdata_temp, clear
gen X=a_TOTAL_PRODUCTION
label var X "Oil and gas"


gen exit=0
bys id: egen maxyear=max(year)
replace exit=1 if year== maxyear

label var  Z "Max Discovery rate (-3Y)"

gen isic_2=int(isic4/100)

* FIX H thru time, tougher threshold
bys isic_2 year: egen median_prod=median(Y_L)
bys isic_2 year: egen bottom20_prod=pctile(Y_L),p(20)

bys isic_2 year: egen median_empl=median( empl_total)
egen bottom10_empl=pctile( empl_total),p(20)

*small firms of all time in all sectors
egen small10=pctile( empl_total),p(10)

gen H=0
replace H=1 if  empl_total>bottom10_empl

bys id: egen mmm=mean(H)
replace H=0
replace H=1 if mmm==1

lab var H "Top 80\% size"
gen ZH=Z*H
gen XH=X*H
lab var ZH "x Top 80\% size"
lab var XH "x Top 80\% size"

ivreg2 willexit  X  if Z!=.
capture drop e_smpl
predict e_smpl if e(sample), res
xi i.year 


*
eststo clear
*
eststo: xtivreg2  willexit   X _Iyear* if e_smpl!=. , fe ffirst cluster(id kab_year)
eststo: xtivreg2  willexit    (X=Z ) _Iyear* if e_smpl!=., fe ffirst  savefprefix(f2) cluster(id kab_year)
estadd scalar pr1 =el(e(first), 2, 1)
estadd scalar F1 =el(e(first), 3, 1)
eststo: xtivreg2  willexit   Z  _Iyear* if e_smpl!=., fe ffirst cluster(id kab_year)

eststo: xtivreg2  willexit   X XH  _Iyear* if e_smpl!=., fe ffirst cluster(id kab_year)
eststo: xtivreg2  willexit    (X XH=Z ZH)  _Iyear* if e_smpl!=., fe ffirst  savefprefix(f4) cluster(id kab_year)
estadd scalar pr1 =el(e(first), 2, 1)
estadd scalar F1 =el(e(first), 3, 1)
eststo: xtivreg2  willexit   Z ZH   _Iyear* if e_smpl!=., fe ffirst cluster(id kab_year)

*SAME FOR R==1
*eststo clear
*
eststo: xtivreg2  willexit   X _Iyear* if e_smpl!=. & R==1, fe ffirst cluster(id kab_year)
eststo: xtivreg2  willexit    (X=Z ) _Iyear* if e_smpl!=. & R==1, fe ffirst  savefprefix(f6) cluster(id kab_year)
estadd scalar pr1 =el(e(first), 2, 1)
estadd scalar F1 =el(e(first), 3, 1)
eststo: xtivreg2  willexit   Z  _Iyear* if e_smpl!=. & R==1, fe ffirst cluster(id kab_year)

eststo: xtivreg2  willexit   X XH  _Iyear* if e_smpl!=. & R==1, fe ffirst cluster(id kab_year)
eststo: xtivreg2  willexit    (X XH=Z ZH)  _Iyear* if e_smpl!=. & R==1, fe ffirst  savefprefix(f8) cluster(id kab_year)
estadd scalar pr1 =el(e(first), 2, 1)
estadd scalar F1 =el(e(first), 3, 1)
eststo: xtivreg2  willexit   Z ZH   _Iyear* if  e_smpl!=. & R==1, fe ffirst cluster(id kab_year)

***esttab using tex\exitssR56_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2 F1 pr1 idp, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq" "F instr" "Part. R-sq instr" "Underid. test p")) nonote drop(*year*) 
***esttab f2X f4X f4XH f6X f8X f8XH using tex\exitpanel_first2_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f) labels (N "R-sq")) nonote drop(*year*) 

*

*----------------------------------------------------------------



*----------------------------------------------------------------
*----------------------------------------------------------------
*----------------------------------------------------------------
******PRODUCTIVITY--------------------------------------------***
*----------------------------------------------------------------
*----------------------------------------------------------------
*----------------------------------------------------------------



*Firm and year FE

use regdata_temp, clear
gen X=a_TOTAL_PRODUCTION
label var X "Oil and gas"


*Dummy for inclusion depending on entry and exit. We now throw out only entry ones, so the number of firms in this group goes down
egen mwillexit=mean(willexit), by(id)
egen mjustentered=mean(justentered), by(id)
egen mjustentered_1=mean(justentered_1), by(id)
egen mjustentered_3=mean(justentered_3), by(id)
egen mjustentered_c=mean(justentered_c), by(id)

gen Dincl=1
replace Dincl=0 if mjustentered>0 & mjustentered!=.
replace Dincl=0 if mjustentered_1>0 & mjustentered_1!=.
replace Dincl=0 if mjustentered_3>0 & mjustentered_3!=.
replace Dincl=0 if mjustentered_c>0 & mjustentered_c!=.
replace Dincl=0 if mwillexit>0 & mwillexit!=.

gen Dnincl=.
replace Dnincl=1 if Dincl==0
replace Dnincl=0 if Dincl==1

*tab D*incl

*survivors
*drop if Dincl==0

*
*-------------------------------------------------------------------------
*Firm and year FE


gen a_price=asinh(uv_core)
replace Y_L=asinh(Y_L)
label var  Z "Max Discovery rate (-3Y)"
lab var a_wage_rate "Wage"
lab var a_empl_total "Empl"
*define a_price
lab var a_price "Price"

ivreg2 a_wage_rate   X  if Z!=.

capture drop e_smpl
predict e_smpl if e(sample), res

gen isic_2=int(isic4/100)


*ivreg2 willexit  X  if Z!=.
*capture drop e_smpl
*predict e_smpl if e(sample), res
xi i.year 


*fuel variables

gen a_fuel=asinh(fuels_tot)
lab var a_fuel "Fuel"
gen a_fuel_va=asinh(fuels_tot/ valadded)
lab var a_fuel_va "Fuel intensity"
gen a_fuel_o=asinh(fuels_tot/ output)
lab var a_fuel_o "Fuel per output"
* fuels_tot_elec
gen a_elec=asinh( elec_plnkWh + elec_nonplnkWh)
gen a_elec_va=asinh((elec_plnkWh + elec_nonplnkWh)/ valadded)
gen a_elec_o=asinh((elec_plnkWh + elec_nonplnkWh)/output)

lab var a_elec_o "Electricty per output"
lab var a_elec_va "Electricty intensity"
lab var a_elec "Electricty"



*labour variables (1)
gen skill_share= 100*(empl_nonprod/ empl_prod)
replace skill_share=. if skill_share>275
lab var skill_share "Non-prod/prod workers"
gen a_S_NS=asinh(skill_share/100)
lab var a_S_NS "Non-prod/prod workers"

*capital variables (6)
gen a_machine=asinh( machine)
gen a_vehicles=asinh( vehicles)
gen a_K=asinh( capitalR )
gen K_L= capitalR / empl_total
gen a_K_L=asinh(K_L)
lab var a_machine "Machines"
lab var a_vehicles "Vehicles"
lab var a_K "Capital stock"
lab var a_K_L "Capital/Labor"
gen a_inv=asinh(inv_total)
lab var a_inv "Investment"
gen capacity=caputil
replace capacity=100 if  caputil>100 & caputil!=.
lab var capacity "Capacity"
gen mch_w=asinh(machine/empl_prod)
gen veh_w=asinh(vehicles/empl_prod)
lab var mch_w "Machines per worker"
lab var veh_w "Vehicles per worker"

*ownership (3)
gen gov_ownership= central_perc+ region_perc
replace gov_o=100 if gov_o>100 & gov_o!=.
lab var gov_o "\% gov onwned"
lab var foreign_perc  "\% foreign onwned"
lab var privdom_perc "\% local onwned"

*product variables (5)
gen a_products=asinh( nb_products)
lab var a_products "Nb of products"
lab var  product_intro "Product intro"
lab var product_drop  "Product drop"
lab var quality_core  "Product quality"
lab var mat_imp_sh "\% inputs imported"
replace mat_imp_sh=mat_imp_sh*100

*export (2)
drop export exporter
gen exporter=0
replace exporter=1 if export_perc>0 & export_perc!=.
lab var exporter "Exporter"

***FUEL

eststo clear
*

foreach x in  a_fuel  a_fuel_o a_elec   a_elec_o{
eststo: xtivreg2  `x'   X _Iyear* if e_smpl!=. , fe ffirst cluster(id kab_year)
}
***esttab using tex\ols_fuel2_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2 F1 pr1 idp, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq")) nonote drop(*year*) 

eststo clear
foreach x in   a_fuel a_fuel_o a_elec a_elec_o{
eststo: xtivreg2  `x'    (X=Z ) _Iyear* if e_smpl!=., fe ffirst  savefprefix(f1`x') cluster(id kab_year)
estadd scalar pr1 =el(e(first), 2, 1)
estadd scalar F1 =el(e(first), 3, 1)
}
*
***esttab using tex\iv_fuel2_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2 F1 pr1 idp, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq" "F instr" "Part. R-sq instr" "Underid. test p")) nonote drop(*year*) 

eststo clear
foreach x in    a_fuel a_fuel_o a_elec a_elec_o {
eststo: xtivreg2  `x'   Z  _Iyear* if e_smpl!=., fe ffirst cluster(id kab_year)
}
*
***esttab using tex\RF_fuel2_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2 F1 pr1 idp, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq" )) nonote drop(*year*) 

*

eststo clear
*

foreach x in  a_fuel a_fuel_o a_elec a_elec_o {
eststo: xtivreg2  `x'   X _Iyear* if e_smpl!=. & R==1 , fe ffirst cluster(id kab_year)
}
***esttab using tex\Rols_fuel2_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2 F1 pr1 idp, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq" )) nonote drop(*year*) 

eststo clear
foreach x in   a_fuel a_fuel_o a_elec a_elec_o{
eststo: xtivreg2  `x'    (X=Z ) _Iyear* if e_smpl!=. & R==1 , fe ffirst  savefprefix(f2`x') cluster(id kab_year)
estadd scalar pr1 =el(e(first), 2, 1)
estadd scalar F1 =el(e(first), 3, 1)
}
*
***esttab using tex\Riv_fuel2_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2 F1 pr1 idp, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq" "F instr" "Part. R-sq instr" "Underid. test p")) nonote drop(*year*) 

eststo clear
foreach x in   a_fuel a_fuel_o a_elec a_elec_o {
eststo: xtivreg2  `x'   Z  _Iyear* if e_smpl!=. & R==1 , fe ffirst cluster(id kab_year)
}
*
***esttab using tex\RRF_fuel2_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2 F1 pr1 idp, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq" )) nonote drop(*year*) 

*
***esttab f1a_fuelX  f2a_fuelX    using tex\fuelpanel_first_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f) labels (N "R-sq")) nonote drop(*year*) 


***machines etc...

eststo clear
*
foreach x in  capacity  a_inv  a_machine a_vehicle  gov_o foreign_perc {
eststo: xtivreg2  `x'   X _Iyear* if e_smpl!=. , fe ffirst cluster(id kab_year)
reghdfe  a_inv   (X=Z)  if e_smpl!=., absorb(id year) cluster(id kab_year)
}
***esttab using tex\sols_KL22_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2 F1 pr1 idp, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq" "F instr" "Part. R-sq instr" "Underid. test p")) nonote drop(*year*) 

eststo clear
foreach x in  capacity  a_inv a_machine a_vehicle gov_o foreign_perc{
eststo: xtivreg2  `x'    (X=Z ) _Iyear* if e_smpl!=., fe ffirst  savefprefix(f1`x') cluster(id kab_year)
estadd scalar pr1 =el(e(first), 2, 1)
estadd scalar F1 =el(e(first), 3, 1)
}
*
***esttab using tex\siv_KL22_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2 F1 pr1 idp, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq" "F instr" "Part. R-sq instr" "Underid. test p")) nonote drop(*year*) 

eststo clear
foreach x in capacity  a_inv  a_machine a_vehicle gov_o foreign_perc{
eststo: xtivreg2  `x'   Z  _Iyear* if e_smpl!=., fe ffirst cluster(id kab_year)
}
*
***esttab using tex\sRF_KL22_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2 F1 pr1 idp, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq" "F instr" "Part. R-sq instr" "Underid. test p")) nonote drop(*year*) 

*


******PRODUCTS******


eststo clear
*
foreach x in   a_products product_intro product_drop quality_core  {
eststo: xtivreg2  `x'   X _Iyear* if e_smpl!=. , fe ffirst cluster(id kab_year)
}
***esttab using tex\sols_ex2_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2 F1 pr1 idp, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq" "F instr" "Part. R-sq instr" "Underid. test p")) nonote drop(*year*) 

eststo clear
foreach x in  a_products product_intro product_drop quality_core  {
eststo: xtivreg2  `x'    (X=Z ) _Iyear* if e_smpl!=., fe ffirst  savefprefix(f2) cluster(id kab_year)
estadd scalar pr1 =el(e(first), 2, 1)
estadd scalar F1 =el(e(first), 3, 1)
}
*
***esttab using tex\siv_ex2_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2 F1 pr1 idp, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq" "F instr" "Part. R-sq instr" "Underid. test p")) nonote drop(*year*) 

eststo clear
foreach x in    a_products product_intro product_drop quality_core  {
eststo: xtivreg2  `x'   Z  _Iyear* if e_smpl!=., fe ffirst cluster(id kab_year)
}
*
***esttab using tex\sRF_ex2_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2 F1 pr1 idp, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq" "F instr" "Part. R-sq instr" "Underid. test p")) nonote drop(*year*) 

*
*SAME FOR R==1


eststo clear
*
foreach x in  capacity  a_inv  a_machine a_vehicle gov_o foreign_perc{
eststo: xtivreg2  `x'   X _Iyear* if e_smpl!=. &  R==1   , fe ffirst cluster(id kab_year)
}
***esttab using tex\srols_KL22_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2 F1 pr1 idp, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq" "F instr" "Part. R-sq instr" "Underid. test p")) nonote drop(*year*) 

eststo clear
foreach x in  capacity  a_inv  a_machine a_vehicle gov_o foreign_perc{
eststo: xtivreg2  `x'    (X=Z ) _Iyear* if e_smpl!=. &  R==1  , fe ffirst  savefprefix(f4`x') cluster(id kab_year)
estadd scalar pr1 =el(e(first), 2, 1)
estadd scalar F1 =el(e(first), 3, 1)
}
*
***esttab using tex\sriv_KL22_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2 F1 pr1 idp, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq" "F instr" "Part. R-sq instr" "Underid. test p")) nonote drop(*year*) 

eststo clear
foreach x in capacity  a_inv  a_machine a_vehicle gov_o foreign_perc {
eststo: xtivreg2  `x'   Z  _Iyear* if e_smpl!=. &  R==1  , fe ffirst cluster(id kab_year)
}
*
***esttab using tex\srRF_KL22_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2 F1 pr1 idp, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq" "F instr" "Part. R-sq instr" "Underid. test p")) nonote drop(*year*) 

*



eststo clear
*
foreach x in   a_products product_intro product_drop quality_core  {
eststo: xtivreg2  `x'   X _Iyear* if e_smpl!=. &  R==1   , fe ffirst cluster(id kab_year)
}
***esttab using tex\srols_ex2_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2 F1 pr1 idp, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq" "F instr" "Part. R-sq instr" "Underid. test p")) nonote drop(*year*) 

eststo clear
foreach x in   a_products product_intro product_drop quality_core  {
eststo: xtivreg2  `x'    (X=Z ) _Iyear* if e_smpl!=. &  R==1  , fe ffirst  savefprefix(f3) cluster(id kab_year)
estadd scalar pr1 =el(e(first), 2, 1)
estadd scalar F1 =el(e(first), 3, 1)
}
*
***esttab using tex\sriv_ex2_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2 F1 pr1 idp, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq" "F instr" "Part. R-sq instr" "Underid. test p")) nonote drop(*year*) 

eststo clear
foreach x in   a_products product_intro product_drop quality_core  {
eststo: xtivreg2  `x'   Z  _Iyear* if e_smpl!=. &  R==1   , fe ffirst cluster(id kab_year)
}
*
***esttab using tex\srRF_ex2_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2 F1 pr1 idp, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq" "F instr" "Part. R-sq instr" "Underid. test p")) nonote drop(*year*) 

*

*----------------------------------------------------------------
***esttab f2X f3X  using tex\prodpanel_first_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f) labels (N "R-sq")) nonote drop(*year*) 

***esttab f1capacityX  f1a_invX  f1a_machineX f1gov_oX f4capacityX  f4a_invX  f4a_machineX f4gov_oX    using tex\vitypanel_first_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2, fmt(%9.0g %9.2f) labels (N "R-sq")) nonote drop(*year*) 



*
*IS IV good?
*
use regdata_temp, clear
gen X=a_TOTAL_PRODUCTION
label var X "Oil and gas"
label var  Z "Max Discovery rate (-3Y)"
gen luck=Z
*true digging
xtset id year
*gen l1wells=l.wells
*gen l2wells=l2.wells
*gen l3wells=l3.wells
*gen max_wells= max(l1wells, l2wells,l3wells )
replace luck=. if max_wells==0
*in pure data
gen pure_luck=non_dry/wells


collapse (mean) luck pure_luck max_wells (sum) wells non_dry, by(year  code98)

kdensity  pure_luck, xtitle(Success rate)
gen ln_wells=ln(wells)
reg pure_luck ln_wells i.year i.kab, r
avplot ln_wells
reg pure_luck ln_wells, r
avplot ln_wells
*across kab and years
twoway (scatter  pure_luck  ln_wells , xtitle("") ytitle(Success rate) legend(off) xtitle(Nb of wells (log))) (lfit   pure_luck ln_wells ) if year>1990 & year<2009

*across years
preserve
collapse pure_luck wells, by(year)
lab var pure_luck "Success rate"
lab var  wells "Avg nb of wells"
twoway (bar wells year, xtitle("") ytitle(Avg nb of wells)) (line pure_luck year, yaxis(2)  ytitle(Success rate, axis(2))) if year>1990 & year<2009
restore

preserve
keep if wells>0
collapse pure_luck wells, by(year)
lab var pure_luck "Success rate"
lab var  wells "Avg nb of wells"
twoway (bar wells year, xtitle("") ytitle(Avg nb of wells)) (line pure_luck year, yaxis(2)  ytitle(Success rate, axis(2))) if year>1990 & year<2009
restore

*across Kabs
preserve
collapse pure_luck wells, by(code98)
gen ln_wells=ln(wells)
twoway (scatter  pure_luck  ln_wells , xtitle("") ytitle(Avg success rate) legend(off) xtitle(Avg nb of wells (log))) (lfit   pure_luck ln_wells )
restore


***transfers data
*paste 
drop if coderm==.
drop if coderm==0
rename splitfrom code_old
gen code98= coderm
destring code_old, replace force
replace code98=code_old if code_old!=.
drop coderm regenciesmunicipalities excludeoilgas dum_new yrofcreation code_old d_exoilgas law
drop downstreamtype downstreamtransport
keep revtaxnontax* revshoil* revshgas* grdp* pop* code98
collapse (sum) revtaxnontax* revshoil* revshgas* grdp* pop*, by(code98)
reshape long revtaxnontax revshoil revshgas grdp population, i(code98) j(year)
save transfers, replace



/* Additional analysis Jim - September 2nd 2018 */

*Once integrated I can then look at some of the regs/correlations in the referee comments, namely:
*- Corr between gov revenues and our measure of oil
*- Reg between the discovery rate and a district level measure of institutions (which now we have for some districts)
*- Reg between discovery rate and other district level characteristics (we now have quite a range to choose from)
*- Something on bad roads. I have a few measures of road density/travel time that we could try out

 /*There's a separate question in the ref comments about how 'small open' Indonesia is wrt to the oil price. 
 I'll prob handle that one with the BP Statistical Review data instead 
 - essentially my understanding is that while Indonesia has produced quite a bit of oil, 
 its was a very modest net exporter, and now a net importer, so never moved out of being 
 a price taker at any stage.
*/


/* 1.Correlation between governemet revenues (and windfalls) with our measure of oil */


*use "revenue data here"
use regdata.dta, clear

drop if coderm==.
drop if coderm==0
rename splitfrom code_old
gen code98= coderm
destring code_old, replace force
replace code98=code_old if code_old!=.
drop coderm regenciesmunicipalities excludeoilgas dum_new yrofcreation code_old d_exoilgas law
drop downstreamtype downstreamtransport
keep revtaxnontax* revshoil* revshgas* grdp* pop* code98
collapse (sum) revtaxnontax* revshoil* revshgas* grdp* pop*, by(code98)
reshape long revtaxnontax revshoil revshgas grdp population, i(code98) j(year)
save transfers.dta, replace

use regdata.dta, clear

merge 1:1 code98 using transfers.dta

*Do the correlatoins here

use regdata.dta, clear


drop if year<1998
collapse (sum) wells non_dry OIL_PRODUCTION GAS_PRODUCTION TOTAL_PRODUCTION revtaxnontax revtax mining revoilgas ///
revshnontax revshmine revminetot,  by(kab)

gen luck= non_dry/ wells
gen lrevoilgas=ln(revoilgas)
gen lrevshnontax=ln(revshnontax)
gen lrevtaxnontax=ln(revtaxnontax)
gen lprod=ln(TOTAL_PRODUCTION)

/* Corr windfalls and luck */
label var lrevtaxnontax "Log government revenues"
label var lprod "Log oil and gas production"
lab var luck "Success rate 1998-2008"
save windfalls.dta, replace


use windfalls.dta, clear



reg  lrevtaxnontax luck, cluster(kab)
predict e_smpl if e(sample), res
keep if e_smpl!=.

twoway   (lfitci  lrevtaxnontax luck) (scatter  lrevtaxnontax luck, msymbol(circle_hollow) mcolor(navy)), xtitle(Success rate 1998-2008) ytitle(Log government revenues) legend(off) note(Regression coeff: 1.27. Robust s.e.: .65) graphregion( color(white) ) xsize(4.5) ysize(4) scheme(plotplainblind)
*twoway   (lfitci luck ln_grdp_1998) (scatter luck ln_grdp_1998), ytitle(Success rate 1998-2008) legend(off) note(Regression coeff: 0.03. Robust s.e.:0.04. N=57)
***graph export tex/scatter_luck_windfalls.pdf, replace


/* Corr windfalls and production */



use windfalls.dta, clear



reg  lrevtaxnontax lprod, cluster(kab)
predict e_smpl if e(sample), res
keep if e_smpl!=.

twoway   (lfitci lprod lrevtaxnontax) (scatter lprod lrevtaxnontax, msymbol(circle_hollow) mcolor(navy)), xtitle(Log oil and gas production) ytitle(Log government revenues) legend(off) note(Regression coeff: 0.38. Robust s.e.: 0.12) graphregion( color(white) ) xsize(4.5) ysize(4) scheme(plotplainblind)
*twoway   (lfitci luck ln_grdp_1998) (scatter luck ln_grdp_1998), ytitle(Success rate 1998-2008) legend(off) note(Regression coeff: 0.03. Robust s.e.:0.04. N=57)
***graph export tex/scatter_luck_prod_windfalls.pdf, replace





/* 2 and 3. Reg between discovery rate and other district level characteristics (we now have quite a range to choose from) */
/* 2. Reg between the discovery rate and a district level measure of institutions (which now we have for some districts) */



/* District level controls
GRDP - GDP at district level
Population - district ppulatoin
city - dummy if a municipality
dum_new - dummy fi the district is a newly created one
egi_combinedindex - Economic and Governance Index- a time invariant measure of district insituttoins
ACC_P2_median "Market Access-Travel time to major cities (>50k) (Nelson & Uchida, 2008), median"
ACC_G1_median "Market Access-Travel time to major cities (>50k) (Nelson & Uchida, 2008), median"
ROAD_DensHighway_P2 "Road Density All types GRIP (Global Roads Inventory Project) 2013, lengths in km"
ROAD_DensTotal_P2 "Road Density Highways GRIP (Global Roads Inventory Project) 2013, lengths in km "
BUILT_pct_P2 "Built up area 2012, GUF, % of total area"
*/


*use regdata_temp_stata13, clear
use regdata.dta, clear


gen population_1997_0=population if year==1997
egen population_1997=mean(population_1997), by(kab)
gen grdp_1998=grdp if year==1998

sum year if wells!=.
sum year if non_dry!=.

drop if year<1998
collapse (sum) wells non_dry TOTAL_PRODUCTION (mean) BUILT_pct_P2 ROAD_DensTotal_P2 ROAD_DensHighway_P2 ACC_G1_median ACC_P2_median city egi_combinedindex population_1997 grdp_1998,  by(kab)

gen luck= non_dry/ wells
gen ln_population_1997=ln(population_1997)
gen ln_grdp_1998=ln(grdp_1998)

lab var luck "Success rate 1998-2008"
lab var ln_population_1997 "ln population 1997"
lab var ln_grdp_1998 "ln grdp 1998"

save temp_graph_kab_th, replace

*twoway   (lfitci luck ln_population_1997) (scatter luck ln_population_1997) , ytitle(Success rate 1998-2008) legend(off) note(Regression coeff: .057. Robust s.e.:0.07. N=56)
ivreg2 luck ln_population_1997, cluster(kab)
predict e_smpl if e(sample), res
keep if e_smpl!=.

twoway   (lfitci luck ln_population_1997) (scatter luck ln_population_1997, msymbol(circle_hollow) mcolor(navy) ) , ytitle(Success rate 1998-2008) legend(off) note(Regression coeff: 0.05. Robust s.e.: 0.07)  xmtick(11(1)16) ///
 graphregion( color(white) )
***graph export tex/scatter_luck_pop.pdf, replace

use temp_graph_kab_th, clear

ivreg2 luck ln_grdp_1998, cluster(kab)
predict e_smpl if e(sample), res
keep if e_smpl!=.

twoway   (lfitci luck ln_grdp_1998) (scatter luck ln_grdp_1998, msymbol(circle_hollow) mcolor(navy)), ytitle(Success rate 1998-2008) legend(off) note(Regression coeff: 0.03. Robust s.e.: 0.04) graphregion( color(white))
*twoway   (lfitci luck ln_grdp_1998) (scatter luck ln_grdp_1998), ytitle(Success rate 1998-2008) legend(off) note(Regression coeff: 0.03. Robust s.e.:0.04. N=57)
***graph export tex/scatter_luck_gdrp.pdf, replace


/* Institutions */
*egi_combinedindex
use temp_graph_kab_th, clear
label var egi_combinedindex "Economic and Governance Index"
ivreg2 luck egi_combinedindex, cluster(kab)
predict e_smpl if e(sample), res
keep if e_smpl!=.

twoway   (lfitci luck egi_combinedindex) (scatter luck egi_combinedindex, msymbol(circle_hollow) mcolor(navy)), ytitle(Success rate 1998-2008) xsize(5) ysize(4.5) legend(off) note(Regression coeff:  -.005. Robust s.e.: 0.06) scheme(Plotplainblind)
*twoway   (lfitci luck ln_grdp_1998) (scatter luck ln_grdp_1998), ytitle(Success rate 1998-2008) legend(off) note(Regression coeff: 0.03. Robust s.e.:0.04. N=57)
***graph export tex/scatter_luck_institutions.pdf, replace

/* Now do a scatter of exploration and institutions */
use temp_graph_kab_th, clear
label var egi_combinedindex "Economic and Governance Index"
replace wells=0 if wells==.
*replace wells=wells+1 if wells==0
gen lwells=ln(wells)
ivreg2 lwells egi_combinedindex, cluster(kab)
predict e_smpl if e(sample), res
keep if e_smpl!=.

twoway   (lfitci lwells egi_combinedindex) (scatter lwells egi_combinedindex, msymbol(circle_hollow) mcolor(navy)), ytitle(Drilling count 1998-2008) legend(off) note(Regression coeff:  0.013 Robust s.e.: 0.035)  graphregion( color(white) )
*twoway   (lfitci luck ln_grdp_1998) (scatter luck ln_grdp_1998), ytitle(Success rate 1998-2008) legend(off) note(Regression coeff: 0.03. Robust s.e.:0.04. N=57)
***graph export tex/scatter_lwells_institutions.pdf, replace





/* OTher urban controls */
*BUILT_pct_P2   city
use temp_graph_kab_th, clear
gen built=ln(BUILT_pct_P2)

ivreg2 luck built, cluster(kab)
predict e_smpl if e(sample), res
keep if e_smpl!=.

twoway   (lfitci luck built) (scatter luck built, msymbol(circle_hollow) mcolor(navy)), ytitle(Success rate 1998-2008) legend(off) note(Regression coeff: 0.04. Robust s.e.: 0.03)  graphregion( color(white) )
*twoway   (lfitci luck ln_grdp_1998) (scatter luck ln_grdp_1998), ytitle(Success rate 1998-2008) legend(off) note(Regression coeff: 0.03. Robust s.e.:0.04. N=57)
***graph export tex/scatter_luck_built.pdf, replace


use temp_graph_kab_th, clear
ivreg2 luck city, cluster(kab)
label var city "Urban dummy"
predict e_smpl if e(sample), res
keep if e_smpl!=.

twoway   (lfitci luck city) (scatter luck city, msymbol(circle_hollow) mcolor(navy)), ytitle(Success rate 1998-2008) legend(off) note(Regression coeff:  -0.03. Robust s.e.: 0.10) graphregion( color(white) )
*twoway   (lfitci luck ln_grdp_1998) (scatter luck ln_grdp_1998), ytitle(Success rate 1998-2008) legend(off) note(Regression coeff: 0.03. Robust s.e.:0.04. N=57)
***graph export tex/scatter_luck_city.pdf, replace

/* 4. Something on bad roads. I have a few measures of road density/travel time that we could try out */

/* Bad roads */
*ROAD_DensTotal_P2 ROAD_DensHighway_P2 ACC_G1_median ACC_P2_median

use temp_graph_kab_th, clear
gen roads=ln(ROAD_DensTotal_P2)
label var roads "Road density"
ivreg2 luck roads, cluster(kab)
predict e_smpl if e(sample), res
keep if e_smpl!=.

twoway   (lfitci luck roads) (scatter luck roads, msymbol(circle_hollow) mcolor(navy)), ytitle(Success rate 1998-2008) legend(off) note(Regression coeff: 0.06. Robust s.e.: 0.04) scheme(Plotplainblind)
*twoway   (lfitci luck ln_grdp_1998) (scatter luck ln_grdp_1998), ytitle(Success rate 1998-2008) legend(off) note(Regression coeff: 0.03. Robust s.e.:0.04. N=57)
***graph export tex/scatter_luck_roadsdensity.pdf, replace



use temp_graph_kab_th, clear
gen roads2=ln(ROAD_DensHighway_P2)
label var roads2 "Highway density"
ivreg2 luck roads, cluster(kab)
predict e_smpl if e(sample), res
keep if e_smpl!=.

twoway   (lfitci luck roads2) (scatter luck roads2, msymbol(circle_hollow) mcolor(navy)), ytitle(Success rate 1998-2008) legend(off) note(Regression coeff: 0.13. Robust s.e.: 0.08) graphregion( color(white) )
*twoway   (lfitci luck ln_grdp_1998) (scatter luck ln_grdp_1998), ytitle(Success rate 1998-2008) legend(off) note(Regression coeff: 0.03. Robust s.e.:0.04. N=57)
***graph export tex/scatter_luck_highwaydensity.pdf, replace


use temp_graph_kab_th, clear
gen roads3=ln(ACC_G1_median)
label var roads3 "Travel time to major cities (>50k), median (Nelson & Uchida, 2008)"
ivreg2 luck roads3, cluster(kab)
predict e_smpl if e(sample), res
keep if e_smpl!=.

twoway   (lfitci luck roads3) (scatter luck roads3, msymbol(circle_hollow) mcolor(navy)), xtitle("Travel time to major cities (>50k), median""(Nelson & Uchida, 2008)") ytitle(Success rate 1998-2008) legend(off) note(Regression coeff: 0.02. Robust s.e.: 0.04) xsize(5) ysize(4.5) scheme(Plotplainblind)
*twoway   (lfitci luck ln_grdp_1998) (scatter luck ln_grdp_1998), ytitle(Success rate 1998-2008) legend(off) note(Regression coeff: 0.03. Robust s.e.:0.04. N=57)
***graph export tex\scatter_luck_roadaccess.pdf, replace



use temp_graph_kab_th, clear
gen roads4=ln(ACC_P2_median)

ivreg2 luck roads4, robust
predict e_smpl if e(sample), res
keep if e_smpl!=.

twoway   (lfitci luck roads4) (scatter luck roads4, msymbol(circle_hollow) mcolor(navy)), ytitle(Success rate 1998-2008) legend(off) note(Regression coeff: -0.03. Robust s.e.: 0.03) graphregion( color(white) )
*twoway   (lfitci luck ln_grdp_1998) (scatter luck ln_grdp_1998), ytitle(Success rate 1998-2008) legend(off) note(Regression coeff: 0.03. Robust s.e.:0.04. N=57)
***graph export tex/scatter_luck_roadaccess2.pdf, replace

/* Luck relationship */


**Do multivariate and LASSO model next

*ssc install lassopack
*ssc install elasticregress

use temp_graph_kab_th, clear
gen built=ln(BUILT_pct_P2)
gen roads=ln(ROAD_DensTotal_P2)
gen roads2=ln(ROAD_DensHighway_P2)
gen roads3=ln(ACC_G1_median)
gen roads4=ln(ACC_P2_median)
label var ln_grdp_1998 "Log GDP 98"
label var ln_population_1997 "Log Pop 97"
label var egi_combinedindex "Institutions"
label var built "Built up environment"
label var city "Urban dummy"
label var roads "Road density"
label var roads2 "Highway density"
label var roads3 "Travel time"
label var luck "Luck"
su luck ln_grdp_1998 ln_population_1997 egi_combinedindex built roads roads3
eststo clear

eststo: reg luck ln_grdp_1998 ln_population_1997, cluster(kab)


eststo: reg luck ln_grdp_1998 ln_population_1997 egi_combinedindex, cluster(kab)


eststo: reg luck ln_grdp_1998 ln_population_1997 egi_combinedindex built roads roads3 , cluster(kab)
predict e_smpl if e(sample), res
keep if e_smpl!=.

*eststo: lassoregress luck ln_grdp_1998 ln_population_1997 egi_combinedindex built roads roads3, lambda(0.02)

eststo: lassoregress luck ln_grdp_1998 ln_population_1997 egi_combinedindex built roads roads3


***esttab using tex\district_multi_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2 F lambda, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq" "F" "Lambda")) nonote drop() 
***esttab using tex\district_multi.csv, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2 F lambda, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq" "F" "Lambda")) nonote drop() 


eststo clear
lars luck ln_grdp_1998 ln_population_1997 egi_combinedindex built roads roads3, a(lasso)        g

/* Exploration relationship */

use temp_graph_kab_th, clear
gen built=ln(BUILT_pct_P2)
gen roads=ln(ROAD_DensTotal_P2)
gen roads2=ln(ROAD_DensHighway_P2)
gen roads3=ln(ACC_G1_median)
gen roads4=ln(ACC_P2_median)
gen lwells=ln(wells)
label var wells "Wells"
label var lwells "Log Wells"
label var ln_grdp_1998 "Log GDP 98"
label var ln_population_1997 "Log Pop 97"
label var egi_combinedindex "Institutions"
label var built "Built up environment"
label var city "Urban dummy"
label var roads "Road density"
label var roads2 "Highway density"
label var roads3 "Travel time"
label var luck "Luck"

eststo clear

eststo: reg lwells ln_grdp_1998 ln_population_1997, cluster(kab)

eststo: reg lwells ln_grdp_1998 ln_population_1997 egi_combinedindex, cluster(kab)

eststo: reg lwells ln_grdp_1998 ln_population_1997 egi_combinedindex built roads roads3 , cluster(kab)
predict e_smpl if e(sample), res
keep if e_smpl!=.

*eststo: lassoregress lwells ln_grdp_1998 ln_population_1997 egi_combinedindex built roads roads3, lambda(0.02)

eststo: lassoregress lwells ln_grdp_1998 ln_population_1997 egi_combinedindex built roads roads3

***esttab using tex\district_multi_wells_test.tex,, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2 F lambda, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq" "F" "Lambda")) nonote drop() 
***esttab using tex\district_multi_wells.csv, replace label nogaps se b(%9.3f) star(* 0.10 ** 0.05 *** 0.01) stats(N r2 F lambda, fmt(%9.0g %9.2f %9.2f %9.2f %9.2f %9.2f) labels(N "R-sq" "F" "Lambda")) nonote drop() 



eststo clear

lars lwells ln_grdp_1998 ln_population_1997 egi_combinedindex built roads roads3, a(lasso)        g
